WGCNA GO Analysis

sessionInfo() #provides list of loaded packages and version of R. 
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.35     R6_2.5.1          fastmap_1.1.1     xfun_0.43        
##  [5] cachem_1.0.8      knitr_1.45        htmltools_0.5.8.1 rmarkdown_2.26   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.9        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.16.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.7.0       yaml_2.3.8        rlang_1.1.3       jsonlite_1.8.8
#BiocManager::install("simplifyEnrichment")

First, load the necessary packages.

library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
library(grDevices)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
## The following object is masked from 'package:circlize':
## 
##     degree
## The following object is masked from 'package:plyr':
## 
##     join
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:zoo':
## 
##     yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
library(simplifyEnrichment)
## 
## ========================================
## simplifyEnrichment version 1.10.0
## Bioconductor page: https://bioconductor.org/packages/simplifyEnrichment/
## Github page: https://github.com/jokergoo/simplifyEnrichment
## Documentation: https://jokergoo.github.io/simplifyEnrichment/
## Examples: https://simplifyenrichment.github.io/
## 
## If you use it in published research, please cite:
## Gu, Z. simplifyEnrichment: an R/Bioconductor package for Clustering and 
##   Visualizing Functional Enrichment Results, Genomics, Proteomics & 
##   Bioinformatics 2022.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(simplifyEnrichment))
## ========================================
library(scales)

Load in data

library(rtracklayer)
gff<-rtracklayer::import("../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3")
gff<-as.data.frame(gff) %>% dplyr::select(-Parent)
dim(gff) # 478988     9
## [1] 478988     12
names(gff) 
##  [1] "seqnames"      "start"         "end"           "width"        
##  [5] "strand"        "source"        "type"          "score"        
##  [9] "phase"         "ID"            "transcript_id" "gene_id"
transcripts <- subset(gff, type == "transcript")
transcripts_gr <- makeGRangesFromDataFrame(transcripts, keep.extra.columns=TRUE) #extract length information
transcript_lengths <- width(transcripts_gr) #isolate length of each gene
seqnames<-transcripts_gr$ID #extract list of gene id 
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame

dim(transcripts) #33730    13
## [1] 33730    12
kegg <- read.delim("../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt",header = FALSE)
kegg <- as.data.frame(kegg)
colnames(kegg)[1] <- "gene_id" 
colnames(kegg)[2] <- "KEGG_new"
head(kegg)
##                                      gene_id KEGG_new
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a         
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b   K22584
## 3  Pocillopora_acuta_HIv2___RNAseq.g22918.t1         
## 4  Pocillopora_acuta_HIv2___RNAseq.g18333.t1   K03386
## 5   Pocillopora_acuta_HIv2___RNAseq.g7985.t1         
## 6  Pocillopora_acuta_HIv2___RNAseq.g13511.t1
eggnog<-read.delim("../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt")#this file contains all of the go terms, descriptions, kegg, etc
eggnog<- plyr::rename(eggnog, c("X.query"="gene_id"))
head(eggnog,2)
##                                      gene_id  seed_ortholog    evalue score
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120   364
## 2  Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123   355
##                                                                           eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
##   max_annot_lvl COG_category
## 1 33208|Metazoa            E
## 2 33208|Metazoa            O
##                                           Description Preferred_name
## 1    Cobalamin-independent synthase, Catalytic domain              -
## 2 negative regulation of male germ cell proliferation          PRDX4
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
##          EC   KEGG_ko
## 1  2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
##                                                                           KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2                                                                     ko04214,map04214
##   KEGG_Module KEGG_Reaction             KEGG_rclass
## 1      M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2           -             -                       -
##                             BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000       -    -             -
## 2 ko00000,ko00001,ko01000,ko04147       -    -             -
##                 PFAMs
## 1         Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
gogene <- merge(transcripts, eggnog, by=c("gene_id"), all=T)
gogene <- merge(gogene, kegg, by=c("gene_id"), all=T)
head(gogene,2)
##                                   gene_id                           seqnames
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t Pocillopora_acuta_HIv2___Sc0000013
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t Pocillopora_acuta_HIv2___Sc0000013
##     start     end width strand   source       type score.x phase
## 1 4542087 4551503  9417      + AUGUSTUS transcript      NA    NA
## 2 4639103 4647350  8248      + AUGUSTUS transcript      NA    NA
##                                        ID
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t
##                             transcript_id       seed_ortholog   evalue score.y
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t      45351.EDO27354 2.41e-93     317
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t 6087.XP_002166004.2 1.28e-38     164
##                                                                           eggNOG_OGs
## 1 COG0666@1|root,KOG0510@2759|Eukaryota,38G7Q@33154|Opisthokonta,3BCDU@33208|Metazoa
## 2                                              COG0666@1|root,KOG4177@2759|Eukaryota
##    max_annot_lvl COG_category                                Description
## 1  33208|Metazoa           DZ osmolarity-sensing cation channel activity
## 2 2759|Eukaryota            I                           spectrin binding
##   Preferred_name
## 1          TRPA1
## 2              -
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1 GO:0000302,GO:0001580,GO:0002791,GO:0002793,GO:0003008,GO:0003012,GO:0003674,GO:0004888,GO:0005034,GO:0005215,GO:0005216,GO:0005217,GO:0005244,GO:0005245,GO:0005261,GO:0005262,GO:0005488,GO:0005515,GO:0005575,GO:0005623,GO:0005886,GO:0005887,GO:0006810,GO:0006811,GO:0006812,GO:0006816,GO:0006873,GO:0006874,GO:0006875,GO:0006936,GO:0006939,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007166,GO:0007204,GO:0007600,GO:0007602,GO:0007606,GO:0007610,GO:0007638,GO:0008150,GO:0008324,GO:0009266,GO:0009314,GO:0009408,GO:0009409,GO:0009410,GO:0009416,GO:0009453,GO:0009581,GO:0009582,GO:0009583,GO:0009593,GO:0009605,GO:0009612,GO:0009628,GO:0009636,GO:0009719,GO:0009966,GO:0009967,GO:0009987,GO:0010033,GO:0010035,GO:0010037,GO:0010243,GO:0010378,GO:0010646,GO:0010647,GO:0010817,GO:0014070,GO:0014074,GO:0014832,GO:0014848,GO:0015075,GO:0015085,GO:0015267,GO:0015276,GO:0015278,GO:0015318,GO:0016020,GO:0016021,GO:0016043,GO:0016048,GO:0016324,GO:0019233,GO:0019722,GO:0019725,GO:0019932,GO:0022607,GO:0022803,GO:0022832,GO:0022834,GO:0022836,GO:0022838,GO:0022839,GO:0022843,GO:0022857,GO:0022890,GO:0023041,GO:0023051,GO:0023052,GO:0023056,GO:0030001,GO:0030003,GO:0030424,GO:0031000,GO:0031224,GO:0031226,GO:0031644,GO:0031646,GO:0032024,GO:0032421,GO:0032501,GO:0032879,GO:0032880,GO:0032991,GO:0033554,GO:0033555,GO:0034220,GO:0034605,GO:0034702,GO:0034703,GO:0035556,GO:0035690,GO:0035774,GO:0036270,GO:0038023,GO:0040011,GO:0040040,GO:0042221,GO:0042330,GO:0042331,GO:0042391,GO:0042493,GO:0042542,GO:0042592,GO:0042752,GO:0042802,GO:0042995,GO:0043005,GO:0043052,GO:0043269,GO:0043270,GO:0043279,GO:0043933,GO:0044057,GO:0044070,GO:0044085,GO:0044425,GO:0044459,GO:0044464,GO:0045177,GO:0046677,GO:0046873,GO:0046883,GO:0046887,GO:0046957,GO:0048265,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048878,GO:0050708,GO:0050714,GO:0050789,GO:0050794,GO:0050796,GO:0050801,GO:0050848,GO:0050850,GO:0050877,GO:0050896,GO:0050906,GO:0050907,GO:0050909,GO:0050912,GO:0050913,GO:0050951,GO:0050954,GO:0050955,GO:0050960,GO:0050961,GO:0050965,GO:0050966,GO:0050968,GO:0050974,GO:0050982,GO:0051046,GO:0051047,GO:0051049,GO:0051050,GO:0051179,GO:0051209,GO:0051222,GO:0051223,GO:0051234,GO:0051239,GO:0051240,GO:0051259,GO:0051260,GO:0051262,GO:0051282,GO:0051283,GO:0051289,GO:0051480,GO:0051606,GO:0051641,GO:0051649,GO:0051716,GO:0051930,GO:0051931,GO:0051969,GO:0052129,GO:0055065,GO:0055074,GO:0055080,GO:0055082,GO:0055085,GO:0060089,GO:0060341,GO:0060401,GO:0060402,GO:0061178,GO:0065003,GO:0065007,GO:0065008,GO:0070201,GO:0070417,GO:0070588,GO:0070838,GO:0070887,GO:0071241,GO:0071244,GO:0071310,GO:0071312,GO:0071313,GO:0071407,GO:0071415,GO:0071417,GO:0071466,GO:0071495,GO:0071840,GO:0071944,GO:0072347,GO:0072503,GO:0072507,GO:0072511,GO:0090087,GO:0090276,GO:0090277,GO:0097458,GO:0097553,GO:0097603,GO:0097604,GO:0098590,GO:0098655,GO:0098660,GO:0098662,GO:0098771,GO:0098796,GO:0098862,GO:0098900,GO:0098908,GO:0099094,GO:0099604,GO:0120025,GO:1901698,GO:1901699,GO:1901700,GO:1901701,GO:1902495,GO:1902531,GO:1902533,GO:1903522,GO:1903530,GO:1903532,GO:1903793,GO:1904058,GO:1904951,GO:1990351,GO:1990760
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
##   EC   KEGG_ko     KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass
## 1  - ko:K04984 ko04750,map04750           -             -           -
## 2  -         -                -           -             -           -
##                     BRITE                                 KEGG_TC CAZy
## 1 ko00000,ko00001,ko04040 1.A.4.6.1,1.A.4.6.2,1.A.4.6.3,1.A.4.6.5    -
## 2                       -                                       -    -
##   BiGG_Reaction                           PFAMs KEGG_new
## 1             - Ank,Ank_2,Ank_3,Ank_4,Ion_trans   K04984
## 2             - Ank,Ank_2,Ank_4,Ank_5,Ion_trans   K04984
dim(gogene)
## [1] 33730    33
geneInfo <- read.csv("../../output/WGCNA/WGCNA_ModuleMembership.csv") #this file was generated from the WGCNA analyses and contains the modules of interest
geneInfo<- plyr::rename(geneInfo, c("X"="gene_id"))
dim(geneInfo) # there are 9012 genes in our gene info file
## [1] 9012   40
geneInfo <- merge(gogene, geneInfo, by=c("gene_id")) #merging the GO and Kegg info to module membership for the 9012 genes

Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in GOs column

geneInfo$GOs <- gsub(",", ";", geneInfo$GOs)
geneInfo$GOs <- gsub('"', "", geneInfo$GOs)
geneInfo$GOs <- gsub("-", NA, geneInfo$GOs)

geneInfo$KEGG_new[geneInfo$KEGG_new == ""] <- NA
unique(geneInfo$moduleColor)
##  [1] "green"        "blue"         "salmon"       "turquoise"    "yellow"      
##  [6] "black"        "red"          "magenta"      "lightcyan"    "purple"      
## [11] "brown"        "pink"         "midnightblue" "tan"          "cyan"
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)]
dim(geneInfo)
## [1] 9012   73
write.csv(geneInfo, file = "../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv") #gene info for reference/supplement

Modules

calc_up_mods <- c("brown", "red", "black", "pink", "salmon", "blue")

nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")) #942
## [1] 942
nrow(geneInfo %>% dplyr::filter(moduleColor=="red")) #425
## [1] 425
nrow(geneInfo %>% filter(moduleColor=="black")) #396
## [1] 396
nrow(geneInfo %>% filter(moduleColor=="pink")) #220
## [1] 220
nrow(geneInfo %>% filter(moduleColor=="salmon")) #154
## [1] 154
nrow(geneInfo %>% filter(moduleColor=="blue")) #1989
## [1] 1989
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")), nrow(geneInfo %>% dplyr::filter(moduleColor=="red")), nrow(geneInfo %>% filter(moduleColor=="black")), nrow(geneInfo %>% filter(moduleColor=="pink")), nrow(geneInfo %>% filter(moduleColor=="salmon")), nrow(geneInfo %>% filter(moduleColor=="blue")))
## [1] 4126
# 4126

calc_down_mods <- c("turquoise","magenta","lightcyan")

nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")) #2558
## [1] 2558
nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")) #219
## [1] 219
nrow(geneInfo %>% filter(moduleColor=="lightcyan")) #65
## [1] 65
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")), nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")), nrow(geneInfo %>% filter(moduleColor=="lightcyan")))
## [1] 2842
# 2842

other_mods <- c("green","yellow", "purple", "midnightblue","cyan","tan")

sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="green")), nrow(geneInfo %>% dplyr::filter(moduleColor=="yellow")), nrow(geneInfo %>% filter(moduleColor=="purple")), nrow(geneInfo %>% filter(moduleColor=="midnightblue")), nrow(geneInfo %>% filter(moduleColor=="cyan")),nrow(geneInfo %>% filter(moduleColor=="tan")))
## [1] 2044
# 2044

# 4126 + 2842 + 2044 = 9012, which represents all of our genes

Calcification - Up

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

4126 genes are in the 6 modules significantly upregulated by calcification.

### Generate vector with names in just the module we are analyzing
# ID.vector <- geneInfo %>%
#   filter(moduleColor==c("brown", "red", "black", "pink", "salmon", "green")) %>%
#   #get_rows(.data[[module]]))%>%
#   pull(gene_id)

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("brown", "red", "black", "pink", "salmon", "blue")) %>%
  pull(gene_id)

length(ID.vector) #4126
## [1] 4126
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

Run the GOSeq function by module color to test for GO term enrichment. Due to high number of enriched GO terms, I am using a p-value threshold of <0.05 and using rrvgo package to reduce redundancy in list of GO terms.

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
GO_05 <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 654   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0050794            8.436918e-08                1.0000000       1395
## 2 2 GO:0065007            1.553493e-06                0.9999989       1574
## 3 3 GO:0050789            1.584882e-06                0.9999989       1479
## 4 4 GO:0048523            1.581326e-05                0.9999878        800
## 5 5 GO:0048518            3.043369e-05                0.9999758        950
## 6 6 GO:0051336            3.403563e-05                0.9999783        232
##   numInCat                                      term ontology
## 1     2785            regulation of cellular process       BP
## 2     3184                     biological regulation       BP
## 3     2981          regulation of biological process       BP
## 4     1569   negative regulation of cellular process       BP
## 5     1888 positive regulation of biological process       BP
## 6      415          regulation of hydrolase activity       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")

#compare_clustering_methods(Mat, plot_type = "heatmap")

pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 654 terms by 'binary_cut'... 184 clusters, used 2.056766 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcUpMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05.pdf", CalcUpMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 508  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 73

The reduced list of terms is 508 terms that falls under 73 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")

Plot reduced terms

#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification.pdf", plot=GO.plot_calcification, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcUpMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcUpMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05_reduced.pdf", CalcUpMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

Calcification - Up - By Module

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

Brown

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("brown")) %>%
  pull(gene_id)

length(ID.vector) #942
## [1] 942
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_brown <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Brown")

go_results_BP <- GO_05_brown %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 206   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Brown Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Brown.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_brown.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 206 terms by 'binary_cut'... 73 clusters, used 0.3661621 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 11 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Red

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("red")) %>%
  pull(gene_id)

length(ID.vector) #425
## [1] 425
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_red <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Red")

go_results_BP <- GO_05_red %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 209   8
#Write file of results 
write.csv(GO_05_red, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_red.csv")
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
  
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Red Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Red.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_red.pdf", width = 7, height = 5)
result <-  simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 209 terms by 'binary_cut'... 147 clusters, used 1.343836 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 9 GO lists...
result
##             id cluster
## 1   GO:0035725       1
## 2   GO:0060075       2
## 3   GO:0031943       3
## 4   GO:0031944       4
## 5   GO:0031946       5
## 6   GO:0031947       6
## 7   GO:0006814       1
## 8   GO:0051941       7
## 9   GO:0051946       8
## 10  GO:1903280       9
## 11  GO:1903416      10
## 12  GO:0090032      11
## 13  GO:0060439      12
## 14  GO:0046719      13
## 15  GO:0046726      14
## 16  GO:0000707      15
## 17  GO:0000730      15
## 18  GO:0042148      15
## 19  GO:0090735      16
## 20  GO:0010894      17
## 21  GO:0086012      18
## 22  GO:0032351      19
## 23  GO:0032353      20
## 24  GO:0070212      21
## 25  GO:0090030      22
## 26  GO:0002087      23
## 27  GO:0030007      24
## 28  GO:0036376       1
## 29  GO:1903279      25
## 30  GO:0007630      26
## 31  GO:1902808      27
## 32  GO:0032287      28
## 33  GO:0046466      29
## 34  GO:0002028       1
## 35  GO:0086010      30
## 36  GO:0045823      31
## 37  GO:0000724      15
## 38  GO:0000725      15
## 39  GO:1901992      27
## 40  GO:0035158      32
## 41  GO:0044065      33
## 42  GO:0140115       1
## 43  GO:1990573       1
## 44  GO:0015701      34
## 45  GO:0045939      35
## 46  GO:0051890      36
## 47  GO:0086064      37
## 48  GO:0051055      38
## 49  GO:0098655       1
## 50  GO:1900087      27
## 51  GO:2000617      39
## 52  GO:0019377      40
## 53  GO:0007143      27
## 54  GO:0045988      41
## 55  GO:1903053      16
## 56  GO:0055119      42
## 57  GO:1901989      27
## 58  GO:0043217      43
## 59  GO:0030509      44
## 60  GO:0032042      15
## 61  GO:0071805       1
## 62  GO:0045003      15
## 63  GO:2001137      45
## 64  GO:0010248       1
## 65  GO:0002036      46
## 66  GO:0010958      47
## 67  GO:0002807      48
## 68  GO:1904357      15
## 69  GO:0046498      40
## 70  GO:0035562      49
## 71  GO:2000615      50
## 72  GO:0086004      51
## 73  GO:1903115      52
## 74  GO:0090075      53
## 75  GO:0086036      54
## 76  GO:0006883      24
## 77  GO:0098662       1
## 78  GO:0030720      55
## 79  GO:0046128      40
## 80  GO:0060438      56
## 81  GO:0030206      40
## 82  GO:1904646      57
## 83  GO:1902600       1
## 84  GO:0071772      44
## 85  GO:0071773      44
## 86  GO:0007400      58
## 87  GO:1903055      59
## 88  GO:1903789      60
## 89  GO:0006813       1
## 90  GO:0098660       1
## 91  GO:0035203      61
## 92  GO:0035204      62
## 93  GO:0045611      63
## 94  GO:0045613      64
## 95  GO:0045614      65
## 96  GO:0046885      66
## 97  GO:0060081      67
## 98  GO:1902476       1
## 99  GO:0042278      40
## 100 GO:0006023      40
## 101 GO:0045143      27
## 102 GO:0051899      30
## 103 GO:0030510      44
## 104 GO:1901879      16
## 105 GO:0045623      68
## 106 GO:0035172      69
## 107 GO:0006821       1
## 108 GO:1904645      70
## 109 GO:0035066      71
## 110 GO:0000038      40
## 111 GO:1901657      40
## 112 GO:0060391      72
## 113 GO:0051580      30
## 114 GO:0043696      73
## 115 GO:0043697      74
## 116 GO:0006302      15
## 117 GO:1990535      75
## 118 GO:0055075      24
## 119 GO:0048857      76
## 120 GO:0060413      77
## 121 GO:0007604      78
## 122 GO:1990146      79
## 123 GO:1904762      80
## 124 GO:0002213      81
## 125 GO:0009608      82
## 126 GO:0035163      83
## 127 GO:0035165      84
## 128 GO:0035622      85
## 129 GO:0042480      86
## 130 GO:0045316      87
## 131 GO:0045967      88
## 132 GO:0061073      89
## 133 GO:0061331      90
## 134 GO:0072574      91
## 135 GO:0072575      92
## 136 GO:1990705      93
## 137 GO:0051758      94
## 138 GO:0070649      95
## 139 GO:0110009      96
## 140 GO:0070213      97
## 141 GO:0003198      98
## 142 GO:0003220      99
## 143 GO:0003251     100
## 144 GO:0003348     101
## 145 GO:0060956     102
## 146 GO:0061040     103
## 147 GO:1905305     104
## 148 GO:0021550     105
## 149 GO:0021744     106
## 150 GO:0035750     107
## 151 GO:0035177     108
## 152 GO:0002143     109
## 153 GO:0032447      21
## 154 GO:0098822     110
## 155 GO:0045989      41
## 156 GO:0006432      40
## 157 GO:0060047     111
## 158 GO:0010644     112
## 159 GO:0021934     113
## 160 GO:0021935     114
## 161 GO:1902816     115
## 162 GO:1902817     116
## 163 GO:1904527     117
## 164 GO:0030497      40
## 165 GO:0042761      40
## 166 GO:0007127      27
## 167 GO:1903697     118
## 168 GO:1904441     119
## 169 GO:1904442     120
## 170 GO:1990790     121
## 171 GO:1990792     122
## 172 GO:0070407     123
## 173 GO:0043576     124
## 174 GO:0035065      71
## 175 GO:0010766       1
## 176 GO:1902306       1
## 177 GO:2000650       1
## 178 GO:0043371     125
## 179 GO:2000515     126
## 180 GO:0038166     127
## 181 GO:0048867      58
## 182 GO:0031335     128
## 183 GO:0050666     129
## 184 GO:0016233      15
## 185 GO:0002041     130
## 186 GO:0002044     131
## 187 GO:0098661       1
## 188 GO:1903976     132
## 189 GO:1904464     133
## 190 GO:1904465     134
## 191 GO:0031573      27
## 192 GO:0046666     135
## 193 GO:2000758      71
## 194 GO:0060390     136
## 195 GO:0050650      40
## 196 GO:0086009      30
## 197 GO:0086011      30
## 198 GO:0086013     137
## 199 GO:0099622     138
## 200 GO:0003283     139
## 201 GO:0006024      40
## 202 GO:0022011     140
## 203 GO:0032292     141
## 204 GO:0000002     142
## 205 GO:0045822     143
## 206 GO:0003015     144
## 207 GO:0043353     145
## 208 GO:0048822     146
## 209 GO:0045622     147
dev.off()
## quartz_off_screen 
##                 2
# Following filtering of small clusters is based on the simplifyGO/ht_clusters source code for merging the small clusters together

# Filter clusters smaller than 3
cluster_sizes <- table(result$cluster) # table function makes a table with the size of each cluster (freqeuncy) 
large_clusters <- as.numeric(names(cluster_sizes)[cluster_sizes >= 3]) # Identify clusters with frequenct/size less than 3
filtered_indices <- result$cluster %in% large_clusters # Filter out the indices belonging to small clusters
filtered_Mat <- Mat[filtered_indices, filtered_indices] # Create a new matrix with only the indicies from larger clusters

pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_red_filtered.pdf", width = 7, height = 5)
simplifyGO(filtered_Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 1, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 68 terms by 'binary_cut'... 8 clusters, used 0.05846906 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Black

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("black")) %>%
  pull(gene_id)

length(ID.vector) #396
## [1] 396
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_black <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Black")

go_results_BP <- GO_05_black %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 395   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Black Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Black.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_black.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 395 terms by 'binary_cut'... 281 clusters, used 3.360521 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Pink

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("pink")) %>%
  pull(gene_id)

length(ID.vector) #220
## [1] 220
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_pink <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Pink")

go_results_BP <- GO_05_pink %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 234   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Pink Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() +
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Pink.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_pink.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 234 terms by 'binary_cut'... 125 clusters, used 0.6260972 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Salmon

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("salmon")) %>%
  pull(gene_id)

length(ID.vector) #154
## [1] 154
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_salmon <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Salmon")

go_results_BP <- GO_05_salmon %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 198   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Salmon Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Salmon.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_salmon.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 198 terms by 'binary_cut'... 79 clusters, used 0.9675319 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 10 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Blue

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("blue")) %>%
  pull(gene_id)

length(ID.vector) #1989
## [1] 1989
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_blue <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Blue")

go_results_BP <- GO_05_blue %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 1530    8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Blue Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Blue.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_blue.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50))
## Cluster 1530 terms by 'binary_cut'... 419 clusters, used 16.64123 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Combining all the individual module lists for Calcification - Up

GO_05_UP <- rbind(GO_05_brown,GO_05_red,GO_05_black,GO_05_pink,GO_05_salmon,GO_05_blue)

length(GO_05_UP$GOterm) #3810 enriched GO terms
## [1] 3810
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms
## [1] 3617
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented

# Identify and collapse duplicates
GO_05_UP <- GO_05_UP %>%
  arrange(GOterm, over_represented_pvalue) %>%
  group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
  group_by(GOterm) %>%
  slice_min(order_by = over_represented_pvalue, n = 1) %>%
  ungroup()

length(GO_05_UP$GOterm) #3617 enriched GO terms
## [1] 3617
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms, all are unique
## [1] 3617
#Write file of results 
write.csv(GO_05_UP, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv")

Calcification - Down

2842 genes are in the 4 modules significantly downregulated by calcification.

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names in just the module we are analyzing
ID.vector <- geneInfo%>%
  filter(moduleColor %in% c("turquoise","magenta","lightcyan"))%>%
  pull(gene_id)

length(ID.vector) #2842
## [1] 2842
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

### Run the GOSeq function by module color to test for GO term enrichment. Due to high number of enriched GO terms, I am using a p-value threshold of <0.05 and using rrvgo package to reduce redundancy in list of GO terms.

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results 
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
GO_05 <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
go_results_BP <- GO_05 %>%
      filter(ontology=="BP") %>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat > 10) %>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 298   8
head(go_results_BP)
##   X     GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0002181            8.295294e-15                        1         67
## 2 2 GO:0006614            2.463649e-13                        1         48
## 3 3 GO:0006412            3.096109e-13                        1        123
## 4 4 GO:0043043            1.598313e-12                        1        126
## 5 5 GO:0006613            2.273026e-12                        1         49
## 6 6 GO:0006518            7.543937e-12                        1        146
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 3      219                                                 translation       BP
## 4      231                                peptide biosynthetic process       BP
## 5       63               cotranslational protein targeting to membrane       BP
## 6      285                                   peptide metabolic process       BP

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 298 terms by 'binary_cut'... 87 clusters, used 0.6110082 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 4 GO lists...
dev.off()
## quartz_off_screen 
##                 2
CalcDownMods_GO_P05 <- go_results_BP %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05.pdf", CalcDownMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
 #calculate similarity 
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 226  10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_reduced$ParentTerm))
## [1] 38

The reduced list of terms is 226 terms that falls under 38 parent terms.

write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term 
 GO.plot_calcification_down <-  ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) + 
    geom_point(aes(size=over_represented_pvalue)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
GO.plot_calcification_down

ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification_down.pdf", plot=GO.plot_calcification_down, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcDownMods_GO_P05_reduced <- go_results_BP_reduced %>% 
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=1, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 3,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
                    axis.text.y = element_text(size = 2), #set y axis labels size
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

CalcDownMods_GO_P05_reduced

ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05_reduced.pdf", CalcDownMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)

Calcification - Down - By Module

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes 
LENGTH.vector <- as.integer(geneInfo$Length)

Turquoise

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("turquoise")) %>%
  pull(gene_id)

length(ID.vector) #2558
## [1] 2558
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_turquoise <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Turquoise")

go_results_BP <- GO_05_turquoise %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 382   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Turquoise Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() +
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Turquoise.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_turquoise.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 382 terms by 'binary_cut'... 115 clusters, used 1.127246 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Magenta

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("magenta")) %>%
  pull(gene_id)

length(ID.vector) #219
## [1] 219
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_magenta <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Magenta")

go_results_BP <- GO_05_magenta %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 210   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Magenta Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Magenta.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_magenta.pdf", width = 7, height = 5)
result <-  simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 210 terms by 'binary_cut'... 147 clusters, used 0.8285391 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
result
##             id cluster
## 1   GO:0019722       1
## 2   GO:0007267       1
## 3   GO:0033292       2
## 4   GO:0070296       3
## 5   GO:0017158       4
## 6   GO:0008584       5
## 7   GO:0046546       5
## 8   GO:0017157       4
## 9   GO:0014733       6
## 10  GO:0014883       7
## 11  GO:0086015       8
## 12  GO:0086018       9
## 13  GO:0086070      10
## 14  GO:0007268       1
## 15  GO:0098916       1
## 16  GO:0002027      11
## 17  GO:0099537       1
## 18  GO:0099536       1
## 19  GO:0071286      12
## 20  GO:1903305       4
## 21  GO:2000833      13
## 22  GO:0019932       1
## 23  GO:0045838      14
## 24  GO:0086014      15
## 25  GO:0086019      16
## 26  GO:0086026      17
## 27  GO:0086066      18
## 28  GO:0007436      19
## 29  GO:0086091      20
## 30  GO:0098901      21
## 31  GO:1903530       4
## 32  GO:0006376      22
## 33  GO:0051560      23
## 34  GO:0099623      24
## 35  GO:1901018       4
## 36  GO:0030001       4
## 37  GO:0006816       4
## 38  GO:0055074      23
## 39  GO:0086036      25
## 40  GO:0032471      26
## 41  GO:0046883       4
## 42  GO:1905954       4
## 43  GO:0090649      27
## 44  GO:0090650      28
## 45  GO:0086004      29
## 46  GO:1903115      30
## 47  GO:2000831      31
## 48  GO:0051561      23
## 49  GO:0051046       4
## 50  GO:0035260      32
## 51  GO:2000834      33
## 52  GO:2000836      34
## 53  GO:0033563       5
## 54  GO:0021858      35
## 55  GO:0048581       5
## 56  GO:0002168      36
## 57  GO:0060561      37
## 58  GO:1903553      38
## 59  GO:0010960      23
## 60  GO:0050906       5
## 61  GO:0045955      39
## 62  GO:0097711      40
## 63  GO:0010817      14
## 64  GO:0001915      41
## 65  GO:0010615      42
## 66  GO:0014744      43
## 67  GO:1903244      44
## 68  GO:1903799      22
## 69  GO:0014059       4
## 70  GO:0010862       1
## 71  GO:0002381      45
## 72  GO:1901381       4
## 73  GO:1903859       5
## 74  GO:1903861       5
## 75  GO:0008016      46
## 76  GO:0048791       4
## 77  GO:0032026      47
## 78  GO:1903532       4
## 79  GO:0086065      48
## 80  GO:0008406       5
## 81  GO:0045137       5
## 82  GO:0044060      49
## 83  GO:0000389      22
## 84  GO:0061067       5
## 85  GO:0032370       4
## 86  GO:0051412      50
## 87  GO:0019228      14
## 88  GO:0060306      51
## 89  GO:0003299      52
## 90  GO:0014887      53
## 91  GO:0014898      54
## 92  GO:0007619      55
## 93  GO:0006874      23
## 94  GO:0010543      56
## 95  GO:0009584      57
## 96  GO:0043972      58
## 97  GO:0042713       5
## 98  GO:0043084      59
## 99  GO:0060011      60
## 100 GO:0051047       4
## 101 GO:0043379      61
## 102 GO:0090713      62
## 103 GO:0090715      63
## 104 GO:1902349      64
## 105 GO:1902350      65
## 106 GO:1903939      66
## 107 GO:1904515      67
## 108 GO:1903306       4
## 109 GO:0019082      68
## 110 GO:0032804      69
## 111 GO:0032898      70
## 112 GO:0032902      71
## 113 GO:0032911      72
## 114 GO:0071635      73
## 115 GO:0090472      74
## 116 GO:0006812       4
## 117 GO:0002159      75
## 118 GO:0018894      76
## 119 GO:0030845      77
## 120 GO:0032425      78
## 121 GO:0042488      79
## 122 GO:0050930      80
## 123 GO:1990911      81
## 124 GO:2000705      82
## 125 GO:2000707      83
## 126 GO:0018401      84
## 127 GO:0019511      84
## 128 GO:1905289      85
## 129 GO:1905290      86
## 130 GO:0016331       5
## 131 GO:0036124      84
## 132 GO:0031446      87
## 133 GO:0031448      88
## 134 GO:0032470      89
## 135 GO:0051659      90
## 136 GO:0090076      91
## 137 GO:0098909      92
## 138 GO:1901896      93
## 139 GO:1903515      94
## 140 GO:1990036      95
## 141 GO:0007043      22
## 142 GO:0032274      96
## 143 GO:0032275      97
## 144 GO:0006813       4
## 145 GO:0045822      98
## 146 GO:0032469      23
## 147 GO:0050853      99
## 148 GO:0050962     100
## 149 GO:0051567      84
## 150 GO:0002710     101
## 151 GO:0051050       4
## 152 GO:0007435     102
## 153 GO:0030217     103
## 154 GO:1903779     104
## 155 GO:0034105     105
## 156 GO:0086002     106
## 157 GO:0034764       4
## 158 GO:0010650     107
## 159 GO:0034112     108
## 160 GO:0036309     109
## 161 GO:0036371     110
## 162 GO:0086046     111
## 163 GO:0098904     112
## 164 GO:0098907     113
## 165 GO:0098910     114
## 166 GO:1900827     115
## 167 GO:0042492     116
## 168 GO:0046629     117
## 169 GO:1990134     118
## 170 GO:0034110     119
## 171 GO:0035561     120
## 172 GO:0014888     121
## 173 GO:0032414       4
## 174 GO:0051385     122
## 175 GO:0001803     123
## 176 GO:0001805     124
## 177 GO:0001810     125
## 178 GO:0001812     126
## 179 GO:0002343     127
## 180 GO:0002344     128
## 181 GO:0002721     129
## 182 GO:0002883     130
## 183 GO:0002885     131
## 184 GO:0046960     132
## 185 GO:0061337     133
## 186 GO:0060047     134
## 187 GO:0002709     135
## 188 GO:0046958     136
## 189 GO:0040018       5
## 190 GO:0048312      90
## 191 GO:0060393     137
## 192 GO:0010882     138
## 193 GO:0046661       5
## 194 GO:0001666      57
## 195 GO:1901021       4
## 196 GO:0086003     139
## 197 GO:0061647      84
## 198 GO:0006937       5
## 199 GO:0086010      14
## 200 GO:0043267       4
## 201 GO:0032803     140
## 202 GO:0045478     141
## 203 GO:0006407     142
## 204 GO:0050848       1
## 205 GO:0001911     143
## 206 GO:0031342     144
## 207 GO:0046887       4
## 208 GO:0070884     145
## 209 GO:0106056     146
## 210 GO:0003015     147
dev.off()
## quartz_off_screen 
##                 2
# Following filtering of small clusters is based on the simplifyGO/ht_clusters source code for merging the small clusters together

# Filter clusters smaller than 3
cluster_sizes <- table(result$cluster) # table function makes a table with the size of each cluster (freqeuncy) 
large_clusters <- as.numeric(names(cluster_sizes)[cluster_sizes >= 3]) # Identify clusters with frequenct/size less than 3
filtered_indices <- result$cluster %in% large_clusters # Filter out the indices belonging to small clusters
filtered_Mat <- Mat[filtered_indices, filtered_indices] # Create a new matrix with only the indicies from larger clusters


pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_magenta_filtered.pdf", width = 7, height = 5)
simplifyGO(filtered_Mat, word_cloud_grob_param = list(max_width = 50),  min_term = 1, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 68 terms by 'binary_cut'... 9 clusters, used 0.04797482 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 9 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Lightcyan

ID.vector <- geneInfo %>%
  filter(moduleColor %in% c("lightcyan")) %>%
  pull(gene_id)

length(ID.vector) #65
## [1] 65
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
  dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")

dim(GO.terms) #9012    2
## [1] 9012    2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";") 
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split)) 
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2

##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector) 
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene 
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector) 
## Warning in pcls(G): initial point very close to some inequality constraints

#run goseq using Wallenius method for all categories of GO terms 
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_lightcyan <- GO %>%
        dplyr::filter(over_represented_pvalue<0.05) %>%
        dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Lightcyan")

go_results_BP <- GO_05_lightcyan %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)

dim(go_results_BP)
## [1] 129   8
Top10Plot <- go_results_BP %>% 
    top_n(10, wt=-over_represented_pvalue) %>% 
    mutate(hitsPerc=numDEInCat/numInCat) %>% 
    ggplot(aes(x=hitsPerc, 
               y=reorder(term, -hitsPerc), 
               colour=over_represented_pvalue, 
               size=numDEInCat)) +
        geom_point() +
        scale_colour_gradient(low = "red", high = "blue", name = "p value",guide=guide_colorbar(reverse=TRUE)) + 
        expand_limits(x=0) +
        scale_y_discrete(labels = label_wrap(40)) + 
        labs(title = "Top 10 Enriched Biological Process GO Terms, Lightcyan Module", x="Proportion of Genes in Module per GO Term", y="GO term", colour="p value", size="Count") +
        theme_bw() + 
        theme(axis.text.y = element_text(size = 8), 
              axis.title = element_text(size = 10), 
              plot.title = element_text(hjust = 0.5, size = 10)) 

Top10Plot

ggsave("../../output/WGCNA/GO_analysis/Top10_BP_by_mod/Lightcyan.pdf", Top10Plot, width = 5, height = 5, units = c("in"), dpi=300, limitsize=FALSE)

Collapsing with simplifyEnrichment package

library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm

Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_lightcyan.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3, exclude_words = c("regulation", "process"), order_by_size = TRUE)
## Cluster 129 terms by 'binary_cut'... 82 clusters, used 0.3009071 secs.
## 'magick' package is suggested to install to give better rasterization.
## 
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen 
##                 2

Combining all the individual module lists for Calcification - Down

GO_05_DOWN <- rbind(GO_05_turquoise,GO_05_magenta,GO_05_lightcyan)

length(GO_05_DOWN$GOterm) #1072 enriched GO terms
## [1] 1072
length(unique(GO_05_DOWN$GOterm)) #1070 enriched GO terms
## [1] 1070
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented

# Identify and collapse duplicates
GO_05_DOWN <- GO_05_DOWN %>%
  arrange(GOterm, over_represented_pvalue) %>%
  group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
  group_by(GOterm) %>%
  slice_min(order_by = over_represented_pvalue, n = 1) %>%
  ungroup()

#Write file of results 
write.csv(GO_05_DOWN, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv")

Merge the frequency of GOterms for both up- and down-regulation of calcification

Reduce the lists together

# GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
# GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")

GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 2647    8
go_results_BP_down <- GO_05_down %>%
      filter(ontology=="BP")%>%
      filter(over_represented_pvalue != "NA") %>%
      #filter(numInCat>10)%>%
      arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 721   8
all_enriched_terms_up_down <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm)
length(all_enriched_terms_up_down)
## [1] 3368
length(unique(all_enriched_terms_up_down))
## [1] 3217

By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules.

library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package 
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down),
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont="BP",
                                method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity 
reducedTerms <- reduceSimMatrix(simMatrix,
                                scores = "size",
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 2004   10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 148
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
  filter(GOterm %in% reducedTerms$go)
 #add in parent terms to list of go terms 
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]

length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 85
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>% mutate(Factor = "Up") 
head(cal_up_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794            7.575558e-23                        1        823
## 2 GO:0065007            9.035173e-22                        1        912
## 3 GO:0050789            1.200453e-20                        1        859
## 4 GO:0048522            2.736399e-18                        1        531
## 5 GO:0048518            2.733516e-17                        1        574
## 6 GO:0009987            1.373221e-16                        1       1074
##   numInCat                                      term ontology Module
## 1     2785            regulation of cellular process       BP   Blue
## 2     3184                     biological regulation       BP   Blue
## 3     2981          regulation of biological process       BP   Blue
## 4     1705   positive regulation of cellular process       BP   Blue
## 5     1888 positive regulation of biological process       BP   Blue
## 6     4026                          cellular process       BP   Blue
##                                                  ParentTerm Factor
## 1                            regulation of cellular process     Up
## 2                            regulation of cellular process     Up
## 3                            regulation of cellular process     Up
## 4 positive regulation of transcription by RNA polymerase II     Up
## 5 positive regulation of transcription by RNA polymerase II     Up
## 6                                          cellular process     Up

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.

#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
head(cal_down_terms)
##       GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181            1.561228e-16                        1         67
## 2 GO:0006412            2.328139e-16                        1        123
## 3 GO:0043043            3.132084e-15                        1        125
## 4 GO:0006518            5.575430e-15                        1        145
## 5 GO:0006614            1.285770e-14                        1         48
## 6 GO:0043603            2.192130e-14                        1        170
##   numInCat                                                        term ontology
## 1       90                                     cytoplasmic translation       BP
## 2      219                                                 translation       BP
## 3      231                                peptide biosynthetic process       BP
## 4      285                                   peptide metabolic process       BP
## 5       59 SRP-dependent cotranslational protein targeting to membrane       BP
## 6      359                                     amide metabolic process       BP
##      Module        ParentTerm Factor
## 1 Turquoise       translation   Down
## 2 Turquoise       translation   Down
## 3 Turquoise       translation   Down
## 4 Turquoise       translation   Down
## 5 Turquoise protein transport   Down
## 6 Turquoise       translation   Down

Merge up and down-regulation of calcification GOterms

The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification. The list has been further reduced by using the package rrvgo.

all_terms <- rbind(cal_down_terms,cal_up_terms)

all_terms <-  all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm","Module")]

all_terms$GOterm<-as.factor(all_terms$GOterm)

dim(all_terms) #2140 reduced terms 
## [1] 2140   10
length(unique(all_terms$GOterm)) #this represents 2004 unique terms between the two lists of reduced terms
## [1] 2004
length(unique(all_terms$ParentTerm)) #this represents 153 unique terms between the two lists of reduced terms
## [1] 153

Unique terms

This is collapsing the list from 2236 rows to 2004, representing the 2004 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.

goterms_shared <- all_terms %>%
  group_by(GOterm) %>%
  dplyr::summarise(
    ParentTerm = paste(unique(ParentTerm), collapse = ", "),
    Factor = paste(unique(Factor), collapse = ", "),
    term = paste(unique(term), collapse = ", ")
  )
dim(goterms_shared)
## [1] 2004    4
goterms_shared_full <- goterms_shared %>%
  left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 2004 rows back to the 2236 in all_terms
  mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
  mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
  dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
  group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
  dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
            pvalue_Down = na.omit(pvalue_Down)[1]) %>% #carry over the p-value for the term in the down direction, by taking the first non-NA value.
  rename(Factor.x = "Factor") #rename column 
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm.csv")

plots

goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()

goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")

nrow(goterms_up)
## [1] 1571
nrow(goterms_down)
## [1] 297
nrow(goterms_shared_only)
## [1] 136
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_shared_only) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 145
length(unique(goterms_down$ParentTerm))
## [1] 79
length(unique(goterms_shared_only$ParentTerm))
## [1] 42
freq_fig_up_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_down_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) 

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Shared_ParentTerms.pdf", plot=compare_figs, dpi=300, height=3, units="in", limitsize=FALSE)
## Saving 7 x 3 in image
freq_fig_up_pval <- ggplot(goterms_up, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "red") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(color = FALSE)

freq_fig_down_pval <- ggplot(goterms_down, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
  geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
  geom_point(size=1.5, alpha=1, colour = "blue") +
  geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
  coord_flip() +
  theme_classic() +
  ylim(0, 0.05) + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        axis.text.y = element_text(vjust=0.5, size=5), 
        axis.title.x = element_blank(),
        axis.title.y = element_blank())

compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs

ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Unique_ParentTerms.pdf", plot=compare_figs, dpi=300, height=10, units="in", limitsize=FALSE)
## Saving 7 x 10 in image

GOSlim

I downloaded the GOSlim terms list from the following website on 4/5/2024: http://www.informatics.jax.org/gotools/data/input/map2MGIslim.txt

I did so by going to this link and right clicking “here” at “Tab-delimited file mapping GO_ids to MGI GO_Slim category available for download here” and saying “Save link as…” , and I saved it as “map2MGIslim.txt”. I then converted this to CSV and uploaded to this github repository here.

Add slim terms to dataset

GOslim <- read.csv("../../data/map2MGIslim.csv") %>% dplyr::select(-c(term,aspect))
goterms_shared_full_slim <- goterms_shared_full %>%
  inner_join(GOslim, by = c("GOterm" = "GO_id"))

goterms_up_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Up" | Factor=="Down, Up")

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=pvalue_Up, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim.pdf", goterms_up_full_slim_plot, width = 5, height = 35)

goterms_up_full_slim_plot <- goterms_up_slim %>%
      ggplot(aes(x=log10(pvalue_Up), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text.y = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
      ); goterms_up_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim_Log.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_down_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Down" | Factor=="Down, Up")

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=pvalue_Down, 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Over-represented p-value", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

goterms_down_full_slim_plot <- goterms_down_slim %>%
      ggplot(aes(x=log10(pvalue_Down), 
               y=term)) +
        geom_point(size = 0.5) +
        expand_limits(x=0) +
        facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
      labs(x="Log 10(Over-represented p-value)", y="") +
      theme_bw() + 
      theme(
        strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
        axis.text = element_text(size = 1, colour = "black"),
        axis.title.x = element_text(size = 5, colour = "black"),
        legend.text=element_text(size = 5),
        legend.title = element_text(size = 5)
      ); goterms_down_full_slim_plot

ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim_Log.pdf", goterms_down_full_slim_plot, width = 5, height = 35)

For each parent term in this list, how many are “Up” by calcification, “Down”, or both

SharedGOterms = # of GO terms within the parent term that are in the list for “Down”, “Up”, or “Down, Up”

result_parent_unique <- goterms_shared %>%
  group_by(ParentTerm,Factor) %>%
  dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
  arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
parent_filtered_up <- result_parent_unique %>%
  dplyr::filter(Factor=="Up" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 
parent_filtered_down <- result_parent_unique %>%
  dplyr::filter(Factor=="Down" | Factor=="Down, Up")
 #dplyr::filter(SharedGOterms>=5) 

Count number of GOterms by ParentTerm for the upregulation of calcification

result_up <- cal_up_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Up")

head(result_up)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 Arp2/3 complex-mediated actin nucleati…              13 Up                    
## 2 B cell differentiation                                8 Up                    
## 3 BMP signaling pathway                                19 Up                    
## 4 DNA topological change                                7 Up                    
## 5 G protein-coupled receptor signaling p…               3 Up                    
## 6 Golgi organization                                    7 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(result_up)
## [1] 148   3
merged_up <- parent_filtered_up %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_up, by = "ParentTerm") #join with result from above to get the Number.of.Terms column

merged_up_clean <- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 Arp2/3 complex-me…                   13              13 Up                    
## 2 B cell differenti…                    8               8 Up                    
## 3 BMP signaling pat…                   19              19 Up                    
## 4 DNA topological c…                    7               7 Up                    
## 5 G protein-coupled…                    3               3 Up                    
## 6 Golgi organization                    7               7 Up                    
## # ℹ abbreviated name: ¹​Calcification.direction
dim(merged_up_clean)
## [1] 147   4

Count number of GOterms by ParentTerm for the downregulation of calcification

result_down <- cal_down_terms %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Number.of.terms = n_distinct(term))%>%
  mutate(Calcification.direction = "Down")

dim(result_down)
## [1] 85  3
head(result_down)
## # A tibble: 6 × 3
##   ParentTerm                              Number.of.terms Calcification.direct…¹
##   <chr>                                             <int> <chr>                 
## 1 Arp2/3 complex-mediated actin nucleati…               2 Down                  
## 2 DNA topological change                                2 Down                  
## 3 G protein-coupled receptor signaling p…               2 Down                  
## 4 IRE1-mediated unfolded protein response               5 Down                  
## 5 NAD metabolic process                                 6 Down                  
## 6 NADH regeneration                                     1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction
merged_down <- parent_filtered_down %>%
  tidyr::separate_rows(ParentTerm, sep = ", ") %>%
  dplyr::group_by(ParentTerm) %>%
  dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
  left_join(result_down, by = "ParentTerm") 

merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 Arp2/3 complex-me…                    2               2 Down                  
## 2 DNA topological c…                    2               2 Down                  
## 3 G protein-coupled…                    2               2 Down                  
## 4 IRE1-mediated unf…                    5               5 Down                  
## 5 NAD metabolic pro…                    6               6 Down                  
## 6 NADH regeneration                     1               1 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Frequency >10 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=10) %>%
  filter(Calcification.direction=="Up")
dim(cal_freq_terms_filtered_up)
## [1] 68  4

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >10 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=10)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 12 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 fatty acid metab…                   12              12 Down                  
##  2 intracellular si…                   12              12 Down                  
##  3 metabolic process                   15              15 Down                  
##  4 negative regulat…                   12              12 Down                  
##  5 peptidyl-serine …                   16              16 Down                  
##  6 protein transport                   16              16 Down                  
##  7 proton motive fo…                   26              26 Down                  
##  8 regulation of pr…                   13              13 Down                  
##  9 regulation of tr…                   17              17 Down                  
## 10 reproduction                        13              13 Down                  
## 11 tricarboxylic ac…                   11              11 Down                  
## 12 ubiquitin-depend…                   26              26 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

Frequency >20 up

cal_freq_terms_filtered_up <- merged_up_clean %>%
  filter(Number.of.terms>=20) %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 25 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 IRE1-mediated un…                   41              41 Up                    
##  2 anatomical struc…                   20              20 Up                    
##  3 anatomical struc…                   22              22 Up                    
##  4 cell cycle                          57              57 Up                    
##  5 cell projection …                   21              21 Up                    
##  6 chondroitin sulf…                   24              24 Up                    
##  7 fatty acid metab…                   23              23 Up                    
##  8 innate immune re…                   28              28 Up                    
##  9 intracellular ca…                   24              24 Up                    
## 10 intracellular si…                   33              33 Up                    
## # ℹ 15 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_gr20.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image

Frequency >20 down

cal_freq_terms_filtered_down <- merged_down_clean %>%
  filter(Number.of.terms>=20)
  #filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 4
##   ParentTerm         Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##   <chr>                             <int>           <int> <chr>                 
## 1 proton motive for…                   26              26 Down                  
## 2 ubiquitin-depende…                   26              26 Down                  
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_gr20.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs

All terms up

cal_freq_terms_filtered_up_all <- merged_up_clean %>%
  filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up_all
## # A tibble: 147 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 Arp2/3 complex-m…                   13              13 Up                    
##  2 B cell different…                    8               8 Up                    
##  3 BMP signaling pa…                   19              19 Up                    
##  4 DNA topological …                    7               7 Up                    
##  5 G protein-couple…                    3               3 Up                    
##  6 Golgi organizati…                    7               7 Up                    
##  7 IRE1-mediated un…                   41              41 Up                    
##  8 L-ascorbic acid …                    3               3 Up                    
##  9 Mo-molybdopterin…                    5               5 Up                    
## 10 NAD metabolic pr…                    4               4 Up                    
## # ℹ 137 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_up

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_ALL.pdf", plot=freq_fig_up, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image

All terms down

cal_freq_terms_filtered_down_all <- merged_down_clean %>%
  filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down_all
## # A tibble: 84 × 4
##    ParentTerm        Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
##    <chr>                            <int>           <int> <chr>                 
##  1 Arp2/3 complex-m…                    2               2 Down                  
##  2 DNA topological …                    2               2 Down                  
##  3 G protein-couple…                    2               2 Down                  
##  4 IRE1-mediated un…                    5               5 Down                  
##  5 NAD metabolic pr…                    6               6 Down                  
##  6 NADH regeneration                    1               1 Down                  
##  7 P granule assemb…                    1               1 Down                  
##  8 actin cytoskelet…                    1               1 Down                  
##  9 acyl-CoA metabol…                    1               1 Down                  
## 10 anatomical struc…                    4               4 Down                  
## # ℹ 74 more rows
## # ℹ abbreviated name: ¹​Calcification.direction

Figure

freq_fig_down<-ggplot(cal_freq_terms_filtered_down_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
  #facet_wrap(~Calcification.direction, nrow = 1)+
  geom_point(size=5, alpha=1, pch=21,color="black")+
  geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
  #geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
  coord_flip()+
  scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
  #scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  #scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
  scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+ 
  #scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+ 
 #scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+ 
  theme_classic()+
   theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
        plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
        panel.background= element_rect(fill=NA, color='black'),
        legend.title = element_blank(),
        axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger 
        axis.title.x = element_blank(),#making the axis title larger 
        axis.title.y = element_blank(),
        strip.text = element_text(size=12))#making the axis title larger 
freq_fig_down

ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_ALL.pdf", plot=freq_fig_down, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, ncol=2, align="h")
compare_figs